Histogram Example

Author

Elmer Camargo

Published

January 15, 2024

Histogram Example

Using some supply chain data

import pandas as pd
from pandasql import sqldf

import plotly.graph_objects as go
from plotly.subplots import make_subplots
data = pd.read_excel("SupplyChainDataRev1.xlsx")
sqldf("select * from data limit 5")
SupplierID ForeignMainland Cycletime Location Class DefectRate Cost
0 7503961 U 49.0 D Gold 4.0 5.0
1 3209120 F 70.0 D Silver 5.0 8.0
2 1783896 M NaN C Silver 5.0 1.0
3 9060334 None 49.0 D Tin 7.0 7.0
4 3571171 F 66.0 F Silver 9.0 12.0
q1 = """
select
"Cycletime"
from
data
where
"Cycletime" is not null
"""
sqldf(q1).describe()
Cycletime
count 4664.000000
mean 53.719554
std 13.301960
min 18.000000
25% 44.000000
50% 54.000000
75% 64.000000
max 79.000000
q3 = """
with bin_data as (
select
case
    when "Cycletime" >= 15 and "Cycletime" <= 19 then '15-19'
    when "Cycletime" >= 20 and "Cycletime" <= 24 then '20-24'
    when "Cycletime" >= 25 and "Cycletime" <= 29 then '25-29'
    when "Cycletime" >= 30 and "Cycletime" <= 34 then '30-34'
    when "Cycletime" >= 35 and "Cycletime" <= 39 then '35-39'
    when "Cycletime" >= 40 and "Cycletime" <= 44 then '40-44'
    when "Cycletime" >= 45 and "Cycletime" <= 49 then '45-49'
    when "Cycletime" >= 50 and "Cycletime" <= 54 then '50-54'
    when "Cycletime" >= 55 and "Cycletime" <= 59 then '55-59'
    when "Cycletime" >= 60 and "Cycletime" <= 64 then '60-64'
    when "Cycletime" >= 65 and "Cycletime" <= 69 then '65-69'
    when "Cycletime" >= 70 and "Cycletime" <= 74 then '70-74'
    when "Cycletime" >= 75 and "Cycletime" <= 79 then '75-79'
    else '>80'
end as "bins",
count(*) as frequency
from
data
where "Cycletime" is not null
group by "bins"
order by "bins")

select
*,
cast(sum("frequency") over(order by "bins") as float) / cast(sum("frequency") over() as float) as "cumulative_percent"
from bin_data
"""
bin_data = sqldf(q3)
sqldf(q3)
bins frequency cumulative_percent
0 15-19 5 0.001072
1 20-24 18 0.004931
2 25-29 94 0.025086
3 30-34 269 0.082762
4 35-39 410 0.170669
5 40-44 441 0.265223
6 45-49 570 0.387436
7 50-54 632 0.522942
8 55-59 548 0.640437
9 60-64 524 0.752787
10 65-69 493 0.858491
11 70-74 384 0.940823
12 75-79 276 1.000000
import plotly.io as pio
pio.renderers.default = "notebook+pdf"
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Bar(x=bin_data["bins"], y=bin_data["frequency"],marker_color='#B80000',opacity=0.75, name="Frequency")
    )

fig.add_trace(
    go.Scatter(x=bin_data["bins"], y=bin_data["cumulative_percent"], name="Cumulative Percentage", marker_color="#FF9800"),
    secondary_y=True,
)

fig.update_xaxes(title_text="Bins")
fig.update_yaxes(title_text="<b>Frequency</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>Cumulative Percentage</b>", secondary_y=True)

fig.update_layout(title='Binned CycleTime')
fig.show()